---
title: "Cleaning European Values Study"
---

# Load

```{r}
# load packages
  source("helper-packages.R")

# load evs trend file 1981-2017
  evs_1981_2017_raw <-
    read_dta("../raw-data/y-multi-evs/ZA7503_v2-0-0.dta")
```

# Clean Wave 1 (1981)

-NOT PROCESSED:
--REASONS:
---No question on religious tolerance

# Clean Wave 2 (1990)

-NOT PROCESSED:
--REASONS:
---No question on religious tolerance

# Clean Wave 3 (1999)

-NOT PROCESSED:
--REASONS:
---No question on religious tolerance

# Clean Wave 4 (2008)

-NOT PROCESSED:
--REASONS:
---No question on religious tolerance

# Clean Wave 5 (2017)

```{r}
# clean
  clean_evs5 <- 
    evs_1981_2017_raw %>%
    filter(S020 == 2017) %>%
    mutate(
      
    #########################  
    ####### META-DATA #######  
    #########################      
      
      # source name (character vector, title case)
        resp_source = "European Values Study",
        
      # round number (character vector, title case)  
        resp_round = "Wave 5",
      
      # url to dataset source, where publicly available (character vector)
        resp_original_data_url = "bit.ly/3lRIBdc",

      # survey mode (in-person/phone/internet)
        resp_survey_mode =
          case_when(
            mm_mixed_mode_EVS5 %in% c(1:4) ~ "internet/mail", # this is the best categorization we can back out
            mm_mixed_mode_EVS5 == 5 ~ "in-person",
            TRUE ~ NA_character_),    

      # country (character vector; list of countries as written in original source)
        resp_country_original = 
          dplyr::recode(
            as.character(S009),
            "AL" = "Albania",
            "AM" = "Armenia",
            "AT" = "Austria",
            "BA" = "Bosnia and Herzegovina",
            "BE" = "Belgium",
            "BG" = "Bulgaria",
            "BY" = "Belarus",
            "CA" = "Canada",
            "CH" = "Switzerland",
            "CY" = "Cyprus",
            "CZ" = "Czechia",
            "DE" = "Germany",
            "DK" = "Denmark",
            "EE" = "Estonia",
            "ES" = "Spain",
            "FI" = "Finland",
            "FR" = "France",
            "GB-GBN" = "Great Britain",
            "GE" = "Georgia",
            "GR" = "Greece",
            "HR" = "Croatia",
            "HU" = "Hungary",
            "IE" = "Ireland",
            "IS" = "Iceland",
            "IT" = "Italy",
            "RS-KM" = "Kosovo",
            "LT" = "Lithuania",
            "LU" = "Luxembourg",
            "LV" = "Latvia",
            "MD" = "Moldova, Rep.of",
            "ME" = "Montenegro",
            "MK" = "North Macedonia",
            "MT" = "Malta",
            "CY-TCC" = "Northern Cyprus",
            "GB-NIR" = "Northern Ireland",
            "NL" = "Netherlands",
            "NO" = "Norway",
            "PL" = "Poland",
            "PT" = "Portugal",
            "RS" = "Serbia",
            "RO" = "Romania",
            "RU" = "Russia",
            "SE" = "Sweden",
            "SI" = "Slovenia",
            "SK" = "SlovakRepublic",
            "TR" = "Turkey",
            "UA" = "Ukraine",
            "US" = "United States"),

      # country (character vector; converts to countrycode county.name list)
        resp_country_common = 
          countryname(resp_country_original),
        
      # interview date (variable of class Date; if only month given, input 1st of month)
      # significant numbers of missing dates in denmark and finland 
      # https://europeanvaluesstudy.eu/methodology-data-documentation/survey-2017/full-release-evs2017/participating-countries-and-country-information-survey-2017/
        resp_interview_date = as.Date(as.character(S012), "%Y%m%d"),
    
        resp_interview_start_date = 
          case_when(
            resp_country_common == "Denmark" & resp_survey_mode == "internet/mail" ~ as.Date("2017-12-11"),
            resp_country_common == "Denmark" & resp_survey_mode == "in-person" ~ as.Date("2017-09-27"),
            resp_country_common == "Finland" & resp_survey_mode == "internet/mail" ~ as.Date("2017-11-01"),
            resp_country_common == "Finland" & resp_survey_mode == "in-person" ~ as.Date("2017-11-24"),
            resp_country_common == "Iceland" ~ as.Date("2017-06-20"), #imputed using empirical max/min dates for this country
            resp_country_common == "Sweden" ~ as.Date("2017-09-27"), #imputed using empirical max/min dates for this country
            resp_country_common == "Switzerland" ~ as.Date("2017-09-11"), #imputed using empirical max/min dates for this country
            TRUE ~ NA_Date_),
    
        resp_interview_end_date = 
          case_when(
            resp_country_common == "Denmark" & resp_survey_mode == "internet/mail" ~ as.Date("2018-01-31"),
            resp_country_common == "Denmark" & resp_survey_mode == "in-person" ~ as.Date("2018-01-31"),
            resp_country_common == "Finland" & resp_survey_mode == "internet/mail" ~ as.Date("2018-06-01"),
            resp_country_common == "Finland" & resp_survey_mode == "in-person" ~ as.Date("2018-07-10"),
            resp_country_common == "Iceland" ~ as.Date("2018-03-28"), #imputed using empirical max/min dates for this country
            resp_country_common == "Sweden" ~ as.Date("2018-06-03"), #imputed using empirical max/min dates for this country
            resp_country_common == "Switzerland" ~ as.Date("2018-02-22"), #imputed using empirical max/min dates for this country
            TRUE ~ NA_Date_),    

    #########################  
    ##### DEMOGRAPHICS ######  
    #########################
      
    # trim religion variable F025_EVS to uniform values
        religion_character = to_character(F025_EVS),
        religion_trim = substr(religion_character, 5, nchar(religion_character)),
    
    # respondent's religion (character vector that corresponds to master list)
        resp_religion = case_when(
          religion_trim == "Muslim" ~ "Muslim",
          religion_trim == "Orthodox" ~ "Christian",
          religion_trim == "Other" ~ "Other religion",
          religion_trim == "Protestant" ~ "Christian",
          religion_trim == "Catholic" ~ "Christian",
          religion_trim == "The Roman Catholic Church" ~ "Christian",
          religion_trim == "The Evangelic Church" ~ "Christian",
          religion_trim == "Other, please specify (Write in)" ~ "Other religion",
          religion_trim == "Jehovah’s Witnesses" ~ "Christian",
          religion_trim == "The Serbian Orthodox Church" ~ "Christian",
          religion_trim == "Muslim (Islamic Community in Croatia)" ~ "Muslim",
          religion_trim == "The Greek-Catholic Church" ~ "Christian",
          religion_trim == "The Jewish Community of Zagreb" ~ "Jewish",
          religion_trim == "Union of Baptists Churches in the Republic of Croatian" ~ "Christian",
          religion_trim == "The evangelical Church in Croatia (Pentecostal)" ~ "Christian",
          religion_trim == "Catholic (Roman)" ~ "Christian",
          religion_trim == "Other Church" ~ "Christian",
          religion_trim == "Evangelical Church of Czech Brethren" ~ "Christian",
          religion_trim == "Evangelic (Hussite Church)" ~ "Christian",
          religion_trim == "Orthodox Christian religion" ~ "Christian",
          religion_trim == "Jehovah's Witnesses" ~ "Christian",
          religion_trim == "Greek Catholic" ~ "Christian",
          religion_trim == "Other than Christian" ~ "Other religion",
          religion_trim == "Evangelical A.V. (Lutheran)" ~ "Christian",
          religion_trim == "Jewish" ~ "Jewish",
          religion_trim == "Evangelical Lutheran Church of Denmark" ~ "Christian",
          religion_trim == "Buddhist" ~ "Buddhist",
          religion_trim == "Evangelical Lutheran Church" ~ "Christian",
          religion_trim == "Orthodox Church" ~ "Christian",
          religion_trim == "Pentecostal Church" ~ "Christian",
          religion_trim == "Jehovah's Witness" ~ "Christian",
          religion_trim == "Free Church" ~ "Christian",
          religion_trim == "Protestant Church without Free Churches" ~ "Christian",
          religion_trim == "Roman Catholic Church" ~ "Christian",
          religion_trim == "Russian-Orthodox Church" ~ "Christian",
          religion_trim == "Protestant Free Churches" ~ "Christian",
          religion_trim == "Greek-Orthodox Church" ~ "Christian",
          religion_trim == "Asa Faith Society" ~ "Other religion",
          religion_trim == "Other Christian" ~ "Christian",
          religion_trim == "Lutheran Church of Iceland" ~ "Christian",
          religion_trim == "Reykjavik Independent Church" ~ "Christian",
          religion_trim == "Hafnarfjörður Free Church" ~ "Christian",
          religion_trim == "Reykjavik Free Church" ~ "Christian",
          religion_trim == "The Way, Free Church" ~ "Christian",
          religion_trim == "Seventh day Adventists" ~ "Christian",
          religion_trim == "Other Non Christian" ~ "Other religion",
          religion_trim == "Zuism" ~ "Other religion",
          religion_trim == "Buddhist Association of Iceland" ~ "Buddhist",
          religion_trim == "The Cross" ~ "Christian",
          religion_trim == "Other, which?" ~ "Other religion",
          religion_trim == "Protestantse Kerk Nederland (voorheen Gereformeerd)" ~ "Christian",
          religion_trim == "Protestantse Kerk Nederland (voorheen Hervormd)" ~ "Christian",
          religion_trim == "Overig Protestant" ~ "Christian",
          religion_trim == "Rooms Katholiek" ~ "Christian",
          religion_trim == "Islam" ~ "Muslim",
          religion_trim == "Protestantse Kerk Nederland (niet nader gespecificeerd)" ~ "Christian",
          religion_trim == "Hindoeïsme" ~ "Hindu",
          religion_trim == "Protestantse Kerk Nederland (voorheen Luthers)" ~ "Christian",
          religion_trim == "Roman-catholic" ~ "Christian",
          religion_trim == "Evangelical" ~ "Christian",
          religion_trim == "Orthodoxy" ~ "Christian",
          religion_trim == "Buddhism" ~ "Buddhist",
          religion_trim == "Protestantism" ~ "Christian",
          religion_trim == "Judaism" ~ "Jewish",
          religion_trim == "Catholicism" ~ "Christian",
          religion_trim == "Hinduism" ~ "Hindu",
          religion_trim == "Roman catholic" ~ "Christian",
          religion_trim == "Lutheran" ~ "Christian",
          religion_trim == "Calvinist (Reform protestant)" ~ "Christian",
          religion_trim == "Other Protestant" ~ "Christian",
          religion_trim == "Other christian" ~ "Christian",
          religion_trim == "Christian - no denomination" ~ "Christian",
          religion_trim == "Islam / Muslim" ~ "Muslim",
          religion_trim == "Protestant (no further detail)" ~ "Christian",
          religion_trim == "Roman Catholic" ~ "Christian",
          religion_trim == "Greek or Russian Orthodox" ~ "Christian",
          religion_trim == "Baptist" ~ "Christian",
          religion_trim == "Hindu" ~ "Hindu",
          religion_trim == "Swedish Church" ~ "Christian",
          religion_trim == "Jewish congregation" ~ "Jewish",
          religion_trim == "Any independent church, Protestant (not Swedish Church)" ~ "Christian",
          religion_trim == "Catholic Church" ~ "Christian",
          religion_trim == "An Orthodox church or congregation (Russian, Greek or other)" ~ "Christian",
          religion_trim == "Islamic community" ~ "Muslim",
          religion_trim == "Protestant reformed" ~ "Christian",
          religion_trim == "Christian Catholic" ~ "Christian",
          religion_trim == "Orthodox (greek, russian Church)" ~ "Christian",
          religion_trim == "Free evangelical churches" ~ "Christian",
          religion_trim == "Islamic" ~ "Muslim",
          TRUE ~ NA_character_),
    
    # respondent's denomination (character vector that corresponds to master list)
        resp_denomination = case_when(
          religion_trim == "applicable" ~ NA_character_,
          religion_trim == "nswer" ~ NA_character_,
          religion_trim == "t know" ~ NA_character_,
          religion_trim == "Sidmennt (the Icelandic Ethical Humanist Association)" ~ NA_character_,
          TRUE ~ religion_trim),

      # respondent's age (character vector; bins denoted by single dash ["18-25"])
        resp_age =
          dplyr::recode(
            as.character(X003),
            "-5" = NA_character_,
            "-4" = NA_character_,
            "-3" = NA_character_,
            "-2" = NA_character_,
            "-1" = NA_character_,
            "82" = "82+"),        
      
      # respondent's education level
        resp_education_original =
          dplyr::recode(
            as.character(X025),
            "1" = "1. Inadequately completed elementary education [No education]",
            "2" = "2. Completed (compulsory) elementary education [Primary]",
            "3" = "3. Incomplete secondary school: technical/vocational type [Primary]",
            "4" = "4. Complete secondary school: technical/vocationaltype/secondary [Primary]",
            "5" = "5. Incomplete secondary: university-preparatory type/secondary [Primary]",
            "6" = "6. Complete secondary: university-preparatory type/full secondary [Primary]",
            "7" = "7. Some university without degree/highereducation - lower-level tertiary [Primary]",
            "8" = "8. University with degree/higher education - upper-level tertiary [College]",
            .default = NA_character_),       
      
      # respondent's gender (numeric: female = 1; male = 0; other = NA)
        resp_female =
          case_when(
            X001 == 1 ~ 0,
            X001 == 2 ~ 1,
            TRUE ~ NA_real_),
      
      # respondent resident in rural (vs urban) area (numeric: rural = 1; urban/semi-urban/peri-urban = 0): not asked in this wave
        resp_rural = 
          case_when(
            X049a %in% c(1:2) ~ 1,
            X049a %in% c(3:5) ~ 0,
            TRUE ~ NA_real_),
      
    #########################  
    ### SOCIAL DISTANCE 1 ###  
    #########################
    
      # original question number; question text; response options (input above)
        resp_soc_dist_1_qinfo = "NUM: G007.35.B; QTEXT:  I would like to ask you how much you trust people from various groups. Could you tell me for each whether you trust people from this group completely, somewhat, not very much or not at all? People of another religion.; ROPTIONS: 1 = Trust completely [=0] + 2 = Trust somewhat [=0] + 3 = Do not trust very much [=1] + 4 = Do not trust at all [=1]; TARGET: Different religion; TYPE: Trust",
      
      # original response (as character vector)
        resp_soc_dist_1_original = 
          dplyr::recode(
            as.character(G007_35_B),
            "-5" = NA_character_,
            "-4" = NA_character_,
            "-3" = NA_character_,
            "-2" = NA_character_,
            "-1" = NA_character_),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_soc_dist_1_bin_recode = 
          case_when(
            G007_35_B %in% c(3:4) ~ 1,
            G007_35_B %in% c(1:2) ~ 0,
            TRUE ~ NA_real_),
    
    ############################  
    ### GENERAL SOCIAL TRUST ###  
    ############################
    
      # original question number; question text; response options (input above)
        resp_gentrust_qinfo = "NUM: Q31 [A165]; QTEXT: Generally speaking, would you say that most people can be trusted or that you can't be too careful in dealing with people?; ROPTIONS: 1 = Most people can be trusted [=0] + 2 = Can't be too careful [=1]",

      # original response (as character vector)
        resp_gentrust_original = 
         dplyr::recode(
            as.character(A165),
            "-5" = NA_character_,
            "-4" = NA_character_,
            "-3" = NA_character_,
            "-2" = NA_character_,
            "-1" = NA_character_),       

      # binary recode (numeric: 1 = any negative attitude expressed; 0 otherwise)
        resp_gentrust_bin_recode = 
          case_when(
            A165 == 1 ~ 0,
            A165 == 2 ~ 1,
            TRUE ~ NA_real_),    
    
    #########################  
    ##### RELIGIOSITY #######  
    #########################
    
      # original question number; question text; response options (input above)
        resp_religiosity_qinfo = "NUM: Q1 [A006]; QTEXT: Please say, for each of the following, how important it is in your life. Religion.; ROPTIONS: 1 = Very important + 2 = Rather important + 3 = Not very important + 4 = Not at all important",
  
      # original response (as numeric vector, with non-substantive responses coded as NA_real_)
        resp_religiosity_original = 
          dplyr::recode(
            as.numeric(A006),
            `-5` = NA_real_,
            `-4` = NA_real_,
            `-3` = NA_real_,
            `-2` = NA_real_,
            `-1` = NA_real_),       

      # recode (numeric: scaled 0-1, where 1 is more religious)
        resp_religiosity_recode = (4 - resp_religiosity_original)/3
    
    ) %>% 
    select(starts_with("resp_"))
```

# Save data

```{r}
 saveRDS(clean_evs5, "../cleaned-data/y-16-multi-european-values-study.rds")
```
